# ------------------------- #
#    Downweight the data    #
# ------------------------- #

## Clear working space
rm(list = ls())

dir.create("loop_mods", showWarnings = F, recursive = T)

range01 <- function(x){(x-min(x, na.rm = T))/(max(x, na.rm = T)-min(x, na.rm = T))}

dyad_data <- readRDS("intl_order/network_layers_replication.rds")

dyad_data <- distinct(
  dyad_data, 
  ccode1, ccode2, year, .keep_all = T 
) %>% transform(
  mid = cowmidongoing 
) %>% transform(
  fatal_mid = ifelse(mid == 1 & fatality > 0, 1, 0)
) 

dyad_data$contig[dyad_data$mindist == 0] <- 1

unweighted_data <- distinct(
  dyad_data, 
  ccode1, ccode2, year, .keep_all = T
) %>% mutate(
  major_power = cowmaj1  + cowmaj2,
  joint_major_power = if_else(cowmaj1 == 1 & cowmaj2 == 1, 1, 0),
  joint_region = if_else(region1 == region2, 1, 0)
) %>% dplyr::select(
  c(
    ccode1, ccode2, year, contig, 
    pta, bla, dcaV1, dcaV2,
    dipl, igo, 
    atop_defense, atop_offense, atop_neutral, atop_consul,
    joint_region,
    major_power, joint_major_power, cowc1, cowc2,
    fatal_mid, mid, region1, region2
  )
) %>% reshape2::melt(
  id = c(
    "ccode1", "ccode2", "year", "contig", 
    "joint_region", "cowc1", "cowc2",
    "major_power", "joint_major_power", "mid", "fatal_mid", "region1", "region2"
  )
) %>% dplyr::rename(
  layer = variable
) %>% mutate(
  mid_id = paste0(cowc1, "-", cowc2),
  value = if_else(is.na(value), 0, value),
  region_id = paste0(region1, "-", region2)
) 

unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode1 == 626 & unweighted_data$ccode2 %in% c(625, 530, 500, 501, 484, 490)] <- 1
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode2 == 626 & unweighted_data$ccode1 %in% c(625, 530, 500, 501, 484, 490)] <- 1
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode1 == 626 & !(unweighted_data$ccode2 %in% c(625, 530, 500, 501, 484, 490))] <- 0
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode2 == 626 & !(unweighted_data$ccode1 %in% c(625, 530, 500, 501, 484, 490))] <- 0
unweighted_data$contig[unweighted_data$year >= 2011 & unweighted_data$ccode1 == 625 & unweighted_data$ccode2 %in% c(500, 501, 490)] <- 0
unweighted_data$contig[unweighted_data$year >= 2011 & unweighted_data$ccode2 == 625 & unweighted_data$ccode1 %in% c(500, 501, 490)] <- 0
sum(is.na(unweighted_data$contig))


max(subset(unweighted_data, contig == 1)$year)
sum(is.na(unweighted_data$major_power))

unweighted_data$major_power <- factor(
  unweighted_data$major_power,
  levels = c("0", "1", "2")
)

input_data <- unweighted_data
input_data$year_sqr <- input_data$year ^ 2

igo_temp <- filter(
  input_data, 
  grepl("igo", layer)
) 

range01_data <- list()
for (i in 1:length(unique(igo_temp$layer)))
{
  temp_subset <- filter(
    igo_temp,
    layer == unique(igo_temp$layer)[i]
  )
  for (z in 1:length(unique(temp_subset$year)))
  {
    if (any(subset(temp_subset, year == unique(temp_subset$year)[z])$value != 0))
    {
      subset_by_year <- filter(
        temp_subset, 
        year == unique(temp_subset$year)[z]
      )
      
      subset_by_year <- transform(
        subset_by_year,
        value = round(range01(value), 2)
      )
      range01_data[[length(range01_data) + 1]] <- subset_by_year
    }
  }
}

igo_temp <- do.call(rbind, range01_data)

input_data <- filter(
  input_data,
  !grepl("igo", layer)
) %>% bind_rows(
  igo_temp
) %>% mutate(
  layer = as.character(layer)
)



layers_to_check <- data.frame(
  expand.grid(
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1)
  )
) 


layers_to_check <- layers_to_check[rowSums(layers_to_check) > 0,]
colnames(layers_to_check) <- c("atop_defense",
                               "atop_offense",
                               "atop_consul",
                               "atop_neutral",
                               "bla", 
                               "dipl", 
                               "igo",
                               "pta", 
                               "dcaV2", 
                               "dcaV1"
)
layers_to_check[is.na(layers_to_check)] <- 0

layers_to_check <- filter(
  layers_to_check,
 !(dcaV1 == 1 & dcaV2 == 1),
 atop_defense + atop_offense + igo > 0
)



formula_list <- list(
  ## Fixed 
  form <- as.formula(value ~ contig + year),
  form <- as.formula(value ~ contig + year + major_power),
  form <- as.formula(value ~ contig + year + year_sqr + major_power),
  form <- as.formula(value ~ contig  | year ),
  form <- as.formula(value ~ contig +  joint_region | year ),
  form <- as.formula(value ~ contig | year + cowc1 + cowc2),
  form <- as.formula(value ~ contig + joint_region | year + cowc1 + cowc2),
  form <- as.formula(value ~ contig + major_power | year + cowc1 + cowc2),
  form <- as.formula(value ~ contig + joint_region + major_power | year + cowc1 + cowc2),
  form <- as.formula(value ~ contig | year + cowc1 + cowc2 + region1 + region2),
  form <- as.formula(value ~ contig + major_power | year + cowc1 + cowc2 + region1 + region2),
  NULL
)


outcome_data <- data.frame(
  matrix(0, ncol = 4, nrow  = 0)
)
colnames(outcome_data) <- c("out_group_fmids", "out_group_mids",
                            "layer_combo", "formula_downweight")






input_data <- filter(
  input_data,
  !is.na(value),
  layer %in% colnames(layers_to_check)
)

input_data$layer <- factor(
  input_data$layer, 
  levels = c(
    sort(unique(input_data$layer))
  )
) 

state_id <- distinct(
  input_data,
  ccode1, cowc1
) %>% dplyr::rename(
  ccode = ccode1, 
  cowc = cowc1
)

state_id <- distinct(
  input_data,
  ccode2, cowc2
) %>% dplyr::rename(
  ccode = ccode2, 
  cowc = cowc2
) %>% bind_rows(
  state_id
) %>% distinct(
  ccode, cowc
)

check_existing_output <- NA
for (forms in 1:length(formula_list))
{
  if (!is.null(formula_list[[forms]]))
  {
      layer_summary <- list()
      for (i in 1:length(unique(input_data$layer)))
      {
        if (!any(grepl("\\(", as.character(formula_list[[forms]]))))
        {
          temp_data <- filter(
            input_data,
            layer == unique(input_data$layer)[i]
          ) %>% distinct(
            ccode1, ccode2, year, .keep_all = T
          ) %>% mutate(
            dyad_id = paste0(ccode1, "-", ccode2)
          )
          
          
          years_unique <- data.table::setDT(temp_data)[,.(n = length(unique(value))), by = 'year']
          years_unique <- data.frame(years_unique)
          temp_data <- filter(
            temp_data,
            year %in% years_unique$year[years_unique$n > 1]
          ) %>% data.frame(.)
          
          
          try({layer_model <- fixest::feols(formula_list[[forms]], data = temp_data)})  
        } 
        temp_data$pred_values <- round(predict(layer_model, newdata = temp_data), 2)
        layer_summary[[i]] <- data.frame(temp_data)
      }
      layer_summary_bind <- do.call(bind_rows, layer_summary)
      layer_summary_bind[is.na(layer_summary_bind)] <- 0
      layer_summary_bind$tie <- layer_summary_bind$value - layer_summary_bind$pred_values
      layer_summary_bind$layer <- as.character(layer_summary_bind$layer)
    } else {
      layer_summary <- list()
      for (nulls in 1:length(unique(input_data$layer)))
      {
        temp_data <- filter(
          input_data,
          layer == unique(input_data$layer)[nulls]
        ) %>% distinct(
          ccode1, ccode2, year, .keep_all = T
        ) %>% mutate(
          dyad_id = paste0(ccode1, "-", ccode2)
        )
        
        
        years_unique <- data.table::setDT(temp_data)[,.(n = length(unique(value))), by = 'year']
        years_unique <- data.frame(years_unique)
        layer_summary[[nulls]] <- filter(
          temp_data,
          year >= min(years_unique$year[years_unique$n > 1])
        ) %>% data.frame(.)
      }
      layer_summary_bind <- do.call(bind_rows, layer_summary)
      layer_summary_bind[is.na(layer_summary_bind)] <- 0
      layer_summary_bind$tie <- layer_summary_bind$value 
      layer_summary_bind$layer <- as.character(layer_summary_bind$layer)
  }
  
    
    for (layers in 1:nrow(layers_to_check))
    {
      
      downweighted_data <- filter(
        layer_summary_bind, 
        layer %in%
          c(
            as.character(colnames(layers_to_check)[layers_to_check[layers,] == 1])
          )
      ) %>% group_by(
        year
      ) %>% mutate(
        n = length(unique(layer))
      ) %>% ungroup(
        .
      ) %>% mutate(
        tie = tie/n,
        tie = round(tie, 2)
      )
      
      downweighted_data <- data.table::setDT(downweighted_data)[,.(tie = sum(tie)), by = 'ccode1,ccode2,year']
      downweighted_data <- left_join(
        downweighted_data,
        state_id,
        by = c(
          "ccode1" = "ccode"
        )
      ) %>% left_join(
        state_id,
        by = c(
          "ccode2" = "ccode"
        )
      ) %>% dplyr::rename(
        namea = cowc.x, 
        nameb = cowc.y
      ) %>% dplyr::select(
        namea, nameb, year, tie
      ) %>% data.frame(.)
        
      
      adjacency_matricies <- list()
      for (v in min(layer_summary_bind$year):max(layer_summary_bind$year))
      {
        temp_edgelist <- filter(
          downweighted_data,
          year == v
        )
        
        temp_edgelist <- arrange(
          temp_edgelist,
          namea, nameb
        ) %>% distinct(
          namea, nameb, tie
        )
        
        temp_edgelist$tie[temp_edgelist$tie < 0] <- 0
        
        if (nrow(temp_edgelist) > 0)
        {
          temp_net <- igraph::graph_from_edgelist(as.matrix(temp_edgelist[,c(1:2)]), directed = F)
          igraph::E(temp_net)$weight <- round(temp_edgelist$tie)
          
          temp_adj <- igraph::as_adjacency_matrix(temp_net, attr="weight", sparse = T)
          temp_adj <- as.matrix(temp_adj)
          temp_adj <- temp_adj[order(rownames(temp_adj)),order(colnames(temp_adj))]  
          adjacency_matricies[[length(adjacency_matricies) + 1]] <- as.matrix(temp_adj)
        } else {
         
          temp_adj <- matrix(0, 
                                nrow = nrow(adjacency_matricies[[length(adjacency_matricies)]]), 
                                ncol = nrow(adjacency_matricies[[length(adjacency_matricies)]]))
          rownames(temp_adj) <- colnames(temp_adj) <- colnames(adjacency_matricies[[length(adjacency_matricies)]])
          adjacency_matricies[[length(adjacency_matricies) + 1]] <- temp_adj
        }
        
        
      }
      
      set.seed(9733)   
      community.blondel <- list()
      for (com in 1:length(adjacency_matricies))
      {
        try({g <- igraph::graph_from_adjacency_matrix(adjacency_matricies[[com]], mode = "undirected")
        
        comm <- igraph::cluster_louvain(
            g 
            )  
        
        
        community.blondel[[com]] <- data.frame(
            names = comm$names,
            group = comm$membership
        )  
        
        
        
        rm(comm)
        }, silent = T)
      }
      
      
      for (i in 1:length(community.blondel))
      {
        community.blondel[[i]]$year <- i + 1815
        if (i != 1)
        {
          community.blondel[[i]]$group <- community.blondel[[i]]$group + max(community.blondel[[i - 1]]$group) 
        }
      }
      
      community.blondel <- do.call(bind_rows, community.blondel)
      community.blondel$group <- as.numeric(as.factor(community.blondel$group))
      
      max_year <- data.table::setDT(community.blondel)[,.(max_year = max(year)), by = "group"]
      max_year <- arrange(
        max_year, 
        max_year
      ) %>% data.frame(.)
      community.blondel <- data.frame(community.blondel)
      group_names <- unique(community.blondel$group)
      edgelist <- data.frame(from = NA, to = NA, weight = NA)
      
      for (i in 1:nrow(max_year))
      {
        names_temp <- subset(community.blondel, group == max_year$group[i])$names
        year_temp  <- max_year$max_year[i] + 1
        temp_data <- subset(community.blondel, names %in% names_temp & year == year_temp)
        from <- max_year$group[i]
        to <- unique(temp_data$group)
        weights <- c()
        
        if (length(to) > 0)
        {
          for (z in 1:length(to))
          {
            union_data <- union(names_temp, subset(temp_data, group == to[z])$names)
            sender_vec <- if_else(union_data %in% names_temp, 1, 0)
            reciever_vec <- if_else(union_data %in% subset(temp_data, group == to[z])$names, 1, 0)
            
            weights[z] <- length(intersect(names_temp, subset(temp_data, group == to[z])$names))/length(union_data)
          }
          edgelist <- bind_rows(
            edgelist, 
            data.frame(
              from = rep(from, length(to)), 
              to = to, 
              weight = weights
            )
          ) %>% filter(
            !is.na(from)
          )
        }
      }
      edgelist$weight <- round(edgelist$weight, 2)
      group_matrix <- igraph::graph.data.frame(edgelist, directed = F)
      group_matrix <- igraph::as_adj(group_matrix, sparse = F)
      group_matrix <- group_matrix[ order(row.names(group_matrix)),order(colnames(group_matrix))]
      diag(group_matrix) <- 2
      group_matrix[is.na(group_matrix)] <- 0
      group_matrix[group_matrix < 0] <- 0
      
      
      g <- igraph::graph_from_adjacency_matrix(group_matrix, mode = "undirected", weighted = T)
      set.seed(477)
      comm <- igraph::cluster_louvain(g, weights = igraph::E(g)$weight)
      between.group <- data.frame(
        group = comm$names,
        interlayer = comm$membership
      )
      
      
      
      community.blondel <- community.blondel[,!(colnames(community.blondel) %in% c("interlayer", "n"))]
      community.blondel <- mutate(
        community.blondel,
        group = as.character(group)
      ) %>% left_join(
        between.group,
        by = c(
          "group"
        )
      ) 
      
      war.data.temp.test <- distinct(
        unweighted_data, 
        cowc1, cowc2, year, contig, fatal_mid, .keep_all = T
      ) %>% left_join(
        community.blondel,
        by = c(
          "cowc1" = "names", "year"
        )
      ) %>% left_join(
        community.blondel,
        by = c(
          "cowc2" = "names", "year"
        )
      ) %>% transform(
        dyad = paste(cowc1, cowc2, sep = "-"),
        contig = if_else(contig %in% 1:2, 1, 0),
        year_sqr = year ^ 2,
        out_group = if_else(interlayer.x != interlayer.y, 1, 0)
      )
      
      
      try({mode_temp <- alpaca::feglm(fatal_mid ~ out_group | ccode1 + ccode2 + year + dyad, data = war.data.temp.test)
      temp_outcome_data <- data.frame(
        out_group_fmids = try({coef(mode_temp)[1]}),     
        out_group_mids = try({summary(mode_temp, cluster = c("ccode1", "ccode2", "year", "dyad"))[[1]][4]}),
        layer_combo = layers, 
        formula_downweight = forms
      )
      
      outcome_data <- bind_rows(
        outcome_data,
        temp_outcome_data
      ) %>% distinct(
        layer_combo, formula_downweight, .keep_all = T
      )})
      rownames(outcome_data) <- NULL
      cat(paste0("\rFormula: ", forms, ", layer combo: ", layers))
    }
    saveRDS(outcome_data, file = paste0("loop_mods/outcome_data_", forms, ".rds"))
}
